package data

import (
	"bytes"
	"strings"

	"github.com/go-enry/go-enry/v2/regex"
)

// GeneratedCodeExtensions contains all extensions that belong to generated
// files for sure.
var GeneratedCodeExtensions = map[string]struct{}{
	// XCode files
	".nib":             {},
	".xcworkspacedata": {},
	".xcuserstate":     {},
}

// GeneratedCodeNameMatcher is a function that tells whether the file with the
// given name is generated.
type GeneratedCodeNameMatcher func(string) bool

func nameMatches(pattern string) GeneratedCodeNameMatcher {
	r := regex.MustCompile(pattern)
	return func(name string) bool {
		return r.MatchString(name)
	}
}

func nameContains(pattern string) GeneratedCodeNameMatcher {
	return func(name string) bool {
		return strings.Contains(name, pattern)
	}
}

func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
	return func(name string) bool {
		return strings.HasSuffix(name, pattern)
	}
}

// GeneratedCodeNameMatchers are all the matchers that check whether the code
// is generated based only on the file name.
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
	// Cocoa pods
	nameMatches(`(^Pods|\/Pods)\/`),

	// Carthage build
	nameMatches(`(^|\/)Carthage\/Build\/`),

	// NET designer file
	nameMatches(`(?i)\.designer\.(cs|vb)$`),

	// Generated NET specflow feature file
	nameEndsWith(".feature.cs"),

	// Node modules
	nameContains("node_modules/"),

	// Go vendor
	nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),

	// Go lock
	nameEndsWith("Gopkg.lock"),
	nameEndsWith("glide.lock"),

	// Esy lock
	nameMatches(`(^|\/)(\w+\.)?esy.lock$`),

	// NPM shrinkwrap
	nameEndsWith("npm-shrinkwrap.json"),

	// NPM package lock
	nameEndsWith("package-lock.json"),

	// Yarn plugnplay
	nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),

	// Godeps
	nameContains("Godeps/"),

	// Composer lock
	nameEndsWith("composer.lock"),

	// Generated by zephir
	nameMatches(`.\.zep\.(?:c|h|php)$`),

	// Cargo lock
	nameEndsWith("Cargo.lock"),

	// Pipenv lock
	nameEndsWith("Pipfile.lock"),

	// GraphQL relay
	nameContains("__generated__/"),
}

// GeneratedCodeMatcher checks whether the file with the given data is
// generated code.
type GeneratedCodeMatcher func(path, ext string, content []byte) bool

// GeneratedCodeMatchers is the list of all generated code matchers that
// rely on checking the content of the file to make the guess.
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
	isMinifiedFile,
	hasSourceMapReference,
	isSourceMap,
	isCompiledCoffeeScript,
	isGeneratedNetDocfile,
	isGeneratedJavaScriptPEGParser,
	isGeneratedPostScript,
	isGeneratedGo,
	isGeneratedProtobuf,
	isGeneratedJavaScriptProtocolBuffer,
	isGeneratedApacheThrift,
	isGeneratedJNIHeader,
	isVCRCassette,
	isCompiledCythonFile,
	isGeneratedModule,
	isGeneratedUnity3DMeta,
	isGeneratedRacc,
	isGeneratedJFlex,
	isGeneratedGrammarKit,
	isGeneratedRoxygen2,
	isGeneratedJison,
	isGeneratedGRPCCpp,
	isGeneratedDart,
	isGeneratedPerlPPPortHeader,
	isGeneratedGameMakerStudio,
	isGeneratedGimp,
	isGeneratedVisualStudio6,
	isGeneratedHaxe,
	isGeneratedHTML,
	isGeneratedJooq,
}

func canBeMinified(ext string) bool {
	return ext == ".js" || ext == ".css"
}

// isMinifiedFile returns whether the file may be minified.
// We consider a minified file any css or js file whose average number of chars
// per line is more than 110.
func isMinifiedFile(path, ext string, content []byte) bool {
	if !canBeMinified(ext) {
		return false
	}

	var chars, lines uint64
	forEachLine(content, func(line []byte) {
		chars += uint64(len(line))
		lines++
	})

	if lines == 0 {
		return false
	}

	return chars/lines > 110
}

var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)

// hasSourceMapReference returns whether the file contains a reference to a
// source-map file.
func hasSourceMapReference(_ string, ext string, content []byte) bool {
	if !canBeMinified(ext) {
		return false
	}

	for _, line := range getLines(content, -2) {
		if sourceMapRegex.Match(line) {
			return true
		}
	}

	return false
}

var sourceMapRegexps = []regex.EnryRegexp{
	regex.MustCompile(`^{"version":\d+,`),
	regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
}

// isSourceMap returns whether the file itself is a source map.
func isSourceMap(path, _ string, content []byte) bool {
	if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
		return true
	}

	firstLine := getFirstLine(content)
	if len(firstLine) == 0 {
		return false
	}

	for _, r := range sourceMapRegexps {
		if r.Match(firstLine) {
			return true
		}
	}

	return false
}

func isCompiledCoffeeScript(path, ext string, content []byte) bool {
	if ext != ".js" {
		return false
	}

	firstLine := getFirstLine(content)
	lastLines := getLines(content, -2)
	if len(lastLines) < 2 {
		return false
	}

	if string(firstLine) == "(function() {" &&
		string(lastLines[1]) == "}).call(this);" &&
		string(lastLines[0]) == "" {
		score := 0

		forEachLine(content, func(line []byte) {
			if bytes.Contains(line, []byte("var ")) {
				// Underscored temp vars are likely to be Coffee
				score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")

				// bind and extend functions are very Coffee specific
				score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
			}
		})

		// Require a score of 3. This is fairly abritrary. Consider tweaking later.
		// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
		return score >= 3
	}

	return false
}

func isGeneratedNetDocfile(_, ext string, content []byte) bool {
	if ext != ".xml" {
		return false
	}

	lines := bytes.Split(content, []byte{'\n'})
	if len(lines) <= 3 {
		return false
	}

	return bytes.Contains(lines[1], []byte("<doc>")) &&
		bytes.Contains(lines[2], []byte("<assembly>")) &&
		bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
}

var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)

func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
	if ext != ".js" {
		return false
	}

	// PEG.js-generated parsers include a comment near the top  of the file
	// that marks them as such.
	return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
}

var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)

var postScriptRegexes = []regex.EnryRegexp{
	regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
	regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
}

func isGeneratedPostScript(_, ext string, content []byte) bool {
	if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
		return false
	}

	// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
	// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
	if postScriptType1And42Regex.Match(content) {
		return true
	}

	// We analyze the "%%Creator:" comment, which contains the author/generator
	// of the file. If there is one, it should be in one of the first few lines.
	var creator []byte
	for _, line := range getLines(content, 10) {
		if bytes.HasPrefix(line, []byte("%%Creator: ")) {
			creator = line
			break
		}
	}

	if len(creator) == 0 {
		return false
	}

	// EAGLE doesn't include a version number when it generates PostScript.
	// However, it does prepend its name to the document's "%%Title" field.
	if bytes.Contains(creator, []byte("EAGLE")) {
		for _, line := range getLines(content, 5) {
			if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
				return true
			}
		}
	}

	// Most generators write their version number, while human authors' or companies'
	// names don't contain numbers. So look if the line contains digits. Also
	// look for some special cases without version numbers.
	for _, r := range postScriptRegexes {
		if r.Match(creator) {
			return true
		}
	}

	return false
}

func isGeneratedGo(_, ext string, content []byte) bool {
	if ext != ".go" {
		return false
	}

	lines := getLines(content, 40)
	if len(lines) <= 1 {
		return false
	}

	for _, line := range lines {
		if bytes.Contains(line, []byte("Code generated by")) {
			return true
		}
	}

	return false
}

var protoExtensions = map[string]struct{}{
	".py":   {},
	".java": {},
	".h":    {},
	".cc":   {},
	".cpp":  {},
	".m":    {},
	".rb":   {},
	".php":  {},
}

func isGeneratedProtobuf(_, ext string, content []byte) bool {
	if _, ok := protoExtensions[ext]; !ok {
		return false
	}

	lines := getLines(content, 3)
	if len(lines) <= 1 {
		return false
	}

	for _, line := range lines {
		if bytes.Contains(line, []byte("Generated by the protocol buffer compiler.  DO NOT EDIT!")) {
			return true
		}
	}

	return false
}

func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
	if ext != ".js" {
		return false
	}

	lines := getLines(content, 6)
	if len(lines) < 6 {
		return false
	}

	return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
}

var apacheThriftExtensions = map[string]struct{}{
	".rb":   {},
	".py":   {},
	".go":   {},
	".js":   {},
	".m":    {},
	".java": {},
	".h":    {},
	".cc":   {},
	".cpp":  {},
	".php":  {},
}

func isGeneratedApacheThrift(_, ext string, content []byte) bool {
	if _, ok := apacheThriftExtensions[ext]; !ok {
		return false
	}

	for _, line := range getLines(content, 6) {
		if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
			return true
		}
	}

	return false
}

func isGeneratedJNIHeader(_, ext string, content []byte) bool {
	if ext != ".h" {
		return false
	}

	lines := getLines(content, 2)
	if len(lines) < 2 {
		return false
	}

	return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
		bytes.Contains(lines[1], []byte("#include <jni.h>"))
}

func isVCRCassette(_, ext string, content []byte) bool {
	if ext != ".yml" {
		return false
	}

	lines := getLines(content, -2)
	if len(lines) < 2 {
		return false
	}

	return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
}

func isCompiledCythonFile(_, ext string, content []byte) bool {
	if ext != ".c" && ext != ".cpp" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("Generated by Cython"))
}

func isGeneratedModule(_, ext string, content []byte) bool {
	if ext != ".mod" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
		bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
}

func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
	if ext != ".meta" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
}

func isGeneratedRacc(_, ext string, content []byte) bool {
	if ext != ".rb" {
		return false
	}

	lines := getLines(content, 3)
	if len(lines) < 3 {
		return false
	}

	return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
}

func isGeneratedJFlex(_, ext string, content []byte) bool {
	if ext != ".java" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
}

func isGeneratedGrammarKit(_, ext string, content []byte) bool {
	if ext != ".java" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
}

func isGeneratedRoxygen2(_, ext string, content []byte) bool {
	if ext != ".rd" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
}

func isGeneratedJison(_, ext string, content []byte) bool {
	if ext != ".js" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
		bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
}

func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
	switch ext {
	case ".cpp", ".hpp", ".h", ".cc":
		lines := getLines(content, 1)
		if len(lines) < 1 {
			return false
		}

		return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
	default:
		return false
	}
}

var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)

func isGeneratedDart(_, ext string, content []byte) bool {
	if ext != ".dart" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	return dartRegex.Match(bytes.ToLower(lines[0]))
}

func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
	if !strings.HasSuffix(name, "ppport.h") {
		return false
	}

	lines := getLines(content, 10)
	if len(lines) < 10 {
		return false
	}

	return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
}

var (
	gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
	gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
)

func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
	if ext != ".yy" && ext != ".yyp" {
		return false
	}

	lines := getLines(content, 3)
	if len(lines) < 3 {
		return false
	}

	return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
		gameMakerStudioFirstLineRegex.Match(lines[0])
}

var gimpRegexes = []regex.EnryRegexp{
	regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
	regex.MustCompile(`\/\*  GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h  \*\/`),
}

func isGeneratedGimp(_, ext string, content []byte) bool {
	if ext != ".c" && ext != ".h" {
		return false
	}

	lines := getLines(content, 1)
	if len(lines) < 1 {
		return false
	}

	for _, r := range gimpRegexes {
		if r.Match(lines[0]) {
			return true
		}
	}

	return false
}

func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
	if ext != ".dsp" {
		return false
	}

	for _, l := range getLines(content, 3) {
		if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
			return true
		}
	}

	return false
}

var haxeExtensions = map[string]struct{}{
	".js":   {},
	".py":   {},
	".lua":  {},
	".cpp":  {},
	".h":    {},
	".java": {},
	".cs":   {},
	".php":  {},
}

func isGeneratedHaxe(_, ext string, content []byte) bool {
	if _, ok := haxeExtensions[ext]; !ok {
		return false
	}

	for _, l := range getLines(content, 3) {
		if bytes.Contains(l, []byte("Generated by Haxe")) {
			return true
		}
	}

	return false
}

var (
	doxygenRegex         = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
	htmlMetaRegex        = regex.MustCompile(`<meta(\s+[^>]+)>`)
	htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
	orgModeMetaRegex     = regex.MustCompile(`org\s+mode`)
)

func isGeneratedHTML(_, ext string, content []byte) bool {
	if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
		return false
	}

	lines := getLines(content, 30)

	// Pkgdown
	if len(lines) >= 2 {
		for _, l := range lines[:2] {
			if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
				return true
			}
		}
	}

	// Mandoc
	if len(lines) > 2 &&
		bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
		return true
	}

	// Doxygen
	for _, l := range lines {
		if doxygenRegex.Match(l) {
			return true
		}
	}

	// HTML tag: <meta name="generator" content="" />
	part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
	part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
	part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
	matches := htmlMetaRegex.FindAll(part, -1)
	if len(matches) == 0 {
		return false
	}

	for _, m := range matches {
		var name, value, content string
		ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
		for _, m := range ms {
			switch m[1] {
			case "name":
				name = m[2]
			case "value":
				value = m[2]
			case "content":
				content = m[2]
			}
		}

		var val = value
		if val == "" {
			val = content
		}

		name = strings.Trim(name, `"'`)
		val = strings.Trim(val, `"'`)

		if name != "generator" || val == "" {
			continue
		}

		if strings.Contains(val, "jlatex2html") ||
			strings.Contains(val, "latex2html") ||
			strings.Contains(val, "groff") ||
			strings.Contains(val, "makeinfo") ||
			strings.Contains(val, "texi2html") ||
			strings.Contains(val, "ronn") ||
			orgModeMetaRegex.MatchString(val) {
			return true
		}
	}

	return false
}

func isGeneratedJooq(_, ext string, content []byte) bool {
	if ext != ".java" {
		return false
	}

	for _, l := range getLines(content, 2) {
		if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
			return true
		}
	}

	return false
}

func getFirstLine(content []byte) []byte {
	lines := getLines(content, 1)
	if len(lines) > 0 {
		return lines[0]
	}
	return nil
}

// getLines returns up to the first n lines. A negative index will return up to
// the last n lines in reverse order.
func getLines(content []byte, n int) [][]byte {
	var result [][]byte
	if n < 0 {
		for pos := len(content); pos > 0 && len(result) < -n; {
			nlpos := bytes.LastIndexByte(content[:pos], '\n')
			if nlpos+1 < len(content)-1 {
				result = append(result, content[nlpos+1:pos])
			}
			pos = nlpos
		}
	} else {
		for pos := 0; pos < len(content) && len(result) < n; {
			nlpos := bytes.IndexByte(content[pos:], '\n')
			if nlpos < 0 && pos < len(content) {
				nlpos = len(content)
			} else if nlpos >= 0 {
				nlpos += pos
			}

			result = append(result, content[pos:nlpos])
			pos = nlpos + 1
		}
	}

	return result
}

func forEachLine(content []byte, cb func([]byte)) {
	var pos int
	for pos < len(content) {
		nlpos := bytes.IndexByte(content[pos:], '\n')
		if nlpos < 0 && pos < len(content) {
			nlpos = len(content)
		} else if nlpos >= 0 {
			nlpos += pos
		}

		cb(content[pos:nlpos])
		pos = nlpos + 1
	}
}

func countAppearancesInLine(line []byte, targets ...string) int {
	var count int
	for _, t := range targets {
		count += bytes.Count(line, []byte(t))
	}
	return count
}
